/*
 * Copyright 2010-2012 Susanta Tewari. <freecode4susant@users.sourceforge.net>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Parse file raw-data/genemaps_broad_raw.xml to release/genomemap/pubdata/genemaps_broad.xml
 * according to genemaps.xsd.
 */

import org.dom4j.io.SAXReader

final String FILE_PATH = "raw-data/genemaps_broad_raw.xml"

def records = new SAXReader().read(new File(FILE_PATH)).rootElement
def messages = []

def parseData(String text) {

    def genes = []

    text.split("-D-")[1..-1].each { line ->

        genes << line.split(",")[-2]
    }

    return genes
}

records.elementIterator().each { data ->

    def chr = data.attributeValue('chr')

    println "Chromosome ${chr}"

    println parseData(data.textTrim)

    println "\n"
}




